library(ggplot2)
library(stringr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("~/Documents/Stage_NB/data/maf001_LD03")
# fichiers pour SeqApiPop 629 échantillons et filtre maf001
#eigenvec_refpop <- read.table("SeqApiPop_629_maf001_acp.eigenvec", header = F)
#eigenval_refpop <- read.table("SeqApiPop_629_maf001_acp.eigenval", header = F)
# fichiers pour SeqApiPop 629 échantillons et filtre maf001 + LD pruning = 0.3 (fenêtre de 1749 SNPs et pas de 175 bp)
eigenvec_refpop <- read.table("SeqApiPop_629_maf001_LD03_acp.eigenvec", header = F)
eigenval_refpop <- read.table("SeqApiPop_629_maf001_LD03_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_refpop)[colnames(eigenvec_refpop) == "V2"] <- "name"
eigenvec_refpop_seq_api_labels <- merge(eigenvec_refpop, seq_api_labels, by = "name")
eigen_percent_refpop <- round((eigenval_refpop / (sum(eigenval_refpop) )*100),2)
# Clustering hiérarchique
# Tree
setwd("~/Documents/Stage_NB/data/maf001_LD03")
matrice_app_refpop <- read.table("SeqApiPop_629_maf001_LD03_acp.rel", header = FALSE)
dist_matrice_refpop <- dist(matrice_app_refpop)
hc_refpop <- hclust(dist_matrice_refpop, method = "ward.D2")
plot(hc_refpop)
# heatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# ACP
# Extraction des 301 individus des populations de référence
eigenvec_refpop_seq_api_labels <- eigenvec_refpop_seq_api_labels[eigenvec_refpop_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_refpop_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_refpop_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_refpop_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_refpop_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_refpop_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label2 <- c("black", "mediumpurple4", "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_refpop$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
# ACP avec variance expliquée
ggplot(data = eigenvec_refpop_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.44, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.083, xmax = 0.05, ymin = -0.11, ymax = -0.04)
ggplot(data = eigenvec_refpop_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.44, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses de seuil de confiance 0.97 autour des points selon la variable Label
ggplot(data = eigenvec_refpop_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
geom_text(aes(x = 0.045, y = 0.04, label = "C-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.02, y = 0.05, label = "M-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.02, y = -0.16, label = "O-lineage"), size = 4, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.44, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# ellipses avec couleur
ggplot(data = eigenvec_refpop_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
geom_text(aes(x = 0.045, y = 0.04, label = "M-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.02, y = 0.05, label = "C-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.02, y = -0.16, label = "O-lineage"), size = 4, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.44, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ACP
ggplot(data = eigenvec_refpop_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.56, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = -0.04, xmax = -0.02, ymin = -0.16, ymax = -0.06)
# LD02
setwd("~/Documents/Stage_NB/data/maf001_LD02")
eigenvec_LD02 <- read.table("SeqApiPop_629_maf001_LD02_acp.eigenvec", header = F)
eigenval_LD02 <- read.table("SeqApiPop_629_maf001_LD02_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_LD02)[colnames(eigenvec_LD02) == "V2"] <- "name"
eigenvec_LD02_seq_api_labels <- merge(eigenvec_LD02, seq_api_labels, by = "name")
eigen_percent_LD02 <- round((eigenval_LD02 / (sum(eigenval_LD02) )*100),2)
# Clustering hiérarchique
# Tree
matrice_app_refpop <- read.table("SeqApiPop_629_maf001_LD02_acp.rel", header = FALSE)
dist_matrice_refpop <- dist(matrice_app_refpop)
hc_refpop <- hclust(dist_matrice_refpop, method = "ward.D2")
plot(hc_refpop)
# heatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# Extraction des 301 individus des populations de référence
eigenvec_LD02_seq_api_labels <- eigenvec_LD02_seq_api_labels[eigenvec_LD02_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_LD02_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_LD02_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_LD02_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_LD02_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_LD02_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label2 <- c("black", "mediumpurple4", "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_LD02$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
ggplot(data = eigenvec_LD02_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.05, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.105, xmax = 0.06, ymin = -0.01, ymax = 0.07)
ggplot(data = eigenvec_LD02_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.05, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses autour des points selon Label
ggplot(data = eigenvec_LD02_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.05, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_LD02_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.05, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
ggplot(data = eigenvec_LD02_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.05, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = -0.13, xmax = -0.18, ymin = 0.03, ymax = 0.08)
# LD01
setwd("~/Documents/Stage_NB/data/maf001_LD01")
eigenvec_LD01 <- read.table("SeqApiPop_629_maf001_LD01_acp.eigenvec", header = F)
eigenval_LD01 <- read.table("SeqApiPop_629_maf001_LD01_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_LD01)[colnames(eigenvec_LD01) == "V2"] <- "name"
eigenvec_LD01_seq_api_labels <- merge(eigenvec_LD01, seq_api_labels, by = "name")
eigen_percent_LD01 <- round((eigenval_LD01 / (sum(eigenval_LD01) )*100),2)
# Clustering hiérarchique
# Tree
matrice_app_refpop <- read.table("SeqApiPop_629_maf001_LD01_acp.rel", header = FALSE)
dist_matrice_refpop <- dist(matrice_app_refpop)
hc_refpop <- hclust(dist_matrice_refpop, method = "ward.D2")
plot(hc_refpop)
#heatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# Extraction des 301 individus des populations de référence
eigenvec_LD01_seq_api_labels <- eigenvec_LD01_seq_api_labels[eigenvec_LD01_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_LD01_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_LD01_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_LD01_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_LD01_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_LD01_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label2 <- c("black", "mediumpurple4", "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_LD01$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
# ACP
ggplot(data = eigenvec_LD01_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.05, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.13, xmax = 0.08, ymin = -0.02, ymax = 0.06)
ggplot(data = eigenvec_LD01_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.05, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses autour des points selon Label
ggplot(data = eigenvec_LD01_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.05, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_LD01_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.05, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ACP
ggplot(data = eigenvec_LD01_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.5, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.09, xmax = 0.135, ymin = -0.05, ymax = 0.01)
# LD005
setwd("~/Documents/Stage_NB/data/maf001_LD005")
eigenvec_LD005 <- read.table("SeqApiPop_629_maf001_LD005_acp.eigenvec", header = F)
eigenval_LD005 <- read.table("SeqApiPop_629_maf001_LD005_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_LD005)[colnames(eigenvec_LD005) == "V2"] <- "name"
eigenvec_LD005_seq_api_labels <- merge(eigenvec_LD005, seq_api_labels, by = "name")
eigen_percent_LD005 <- round((eigenval_LD005 / (sum(eigenval_LD005) )*100),2)
# Clustering hiérarchique
# Tree
matrice_app_refpop <- read.table("SeqApiPop_629_maf001_LD005_acp.rel", header = FALSE)
dist_matrice_refpop <- dist(matrice_app_refpop)
hc_refpop <- hclust(dist_matrice_refpop, method = "ward.D2")
plot(hc_refpop)
# heatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# Extraction des 301 individus des populations de référence
eigenvec_LD005_seq_api_labels <- eigenvec_LD005_seq_api_labels[eigenvec_LD005_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_LD005_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_LD005_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_LD005_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_LD005_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_LD005_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label2 <- c("black", "mediumpurple4", "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_LD005$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
ggplot(data = eigenvec_LD005_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.26, 0.64), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.077, xmax = 0.042, ymin = 0.015, ymax = 0.08)
ggplot(data = eigenvec_LD005_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.26, 0.64), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses autour des points selon Label
ggplot(data = eigenvec_LD005_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.25, 0.64), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_LD005_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.25, 0.64), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ACP
ggplot(data = eigenvec_LD005_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.06, xmax = 0.11, ymin = -0.06, ymax = 0)
# LD004
setwd("~/Documents/Stage_NB/data/maf001_LD004")
eigenvec_LD004 <- read.table("SeqApiPop_629_maf001_LD004_acp.eigenvec", header = F)
eigenval_LD004 <- read.table("SeqApiPop_629_maf001_LD004_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_LD004)[colnames(eigenvec_LD004) == "V2"] <- "name"
eigenvec_LD004_seq_api_labels <- merge(eigenvec_LD004, seq_api_labels, by = "name")
eigen_percent_LD004 <- round((eigenval_LD004 / (sum(eigenval_LD004) )*100),2)
# Clustering hiérarchique
# Tree
matrice_app_refpop <- read.table("SeqApiPop_629_maf001_LD004_acp.rel", header = FALSE)
dist_matrice_refpop <- dist(matrice_app_refpop)
hc_refpop <- hclust(dist_matrice_refpop, method = "ward.D2")
plot(hc_refpop)
# heatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# ACP
#filter 629 -> 301 RefPop
eigenvec_LD004_seq_api_labels <- eigenvec_LD004_seq_api_labels[eigenvec_LD004_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_LD004_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_LD004_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_LD004_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_LD004_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_LD004_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label2 <- c("black", "mediumpurple4", "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_LD004$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
ggplot(data = eigenvec_LD004_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.32, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.068, xmax = 0.042, ymin = -0.03, ymax = -0.11)
ggplot(data = eigenvec_LD004_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.32, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses autour des points selon Label
ggplot(data = eigenvec_LD004_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.42, 0.06), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_LD004_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.42, 0.06), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ACP
ggplot(data = eigenvec_LD004_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.03, xmax = 0.09, ymin = 0.06, ymax = 0.12)
# LD003
setwd("~/Documents/Stage_NB/data/maf001_LD003")
eigenvec_LD003 <- read.table("SeqApiPop_629_maf001_LD003_acp.eigenvec", header = F)
eigenval_LD003 <- read.table("SeqApiPop_629_maf001_LD003_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_LD003)[colnames(eigenvec_LD003) == "V2"] <- "name"
eigenvec_LD003_seq_api_labels <- merge(eigenvec_LD003, seq_api_labels, by = "name")
eigen_percent_LD003 <- round((eigenval_LD003 / (sum(eigenval_LD003) )*100),2)
# Clustering hiérarchique
# Tree
matrice_app_refpop <- read.table("SeqApiPop_629_maf001_LD003_acp.rel", header = FALSE)
dist_matrice_refpop <- dist(matrice_app_refpop)
hc_refpop <- hclust(dist_matrice_refpop, method = "ward.D2")
plot(hc_refpop)
# heatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# Extraction des 301 individus des populations de référence
eigenvec_LD003_seq_api_labels <- eigenvec_LD003_seq_api_labels[eigenvec_LD003_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_LD003_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_LD003_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_LD003_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_LD003_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_LD003_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label2 <- c("black", "mediumpurple4", "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_LD003$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
ggplot(data = eigenvec_LD003_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.035, xmax = 0.065, ymin = -0.04, ymax = -0.11)
ggplot(data = eigenvec_LD003_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses autour des points selon Label
ggplot(data = eigenvec_LD003_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_LD003_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ACP
ggplot(data = eigenvec_LD003_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.05, xmax = 0.1, ymin = 0.08, ymax = 0.16)
# LD001
setwd("~/Documents/Stage_NB/data/maf001_LD001")
eigenvec_LD001 <- read.table("SeqApiPop_629_maf001_LD001_acp.eigenvec", header = F)
eigenval_LD001 <- read.table("SeqApiPop_629_maf001_LD001_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_LD001)[colnames(eigenvec_LD001) == "V2"] <- "name"
eigenvec_LD001_seq_api_labels <- merge(eigenvec_LD001, seq_api_labels, by = "name")
eigen_percent_LD001 <- round((eigenval_LD001 / (sum(eigenval_LD001) )*100),2)
# Clustering hiérarchique
# Tree
matrice_app_refpop <- read.table("SeqApiPop_629_maf001_LD001_acp.rel", header = FALSE)
dist_matrice_refpop <- dist(matrice_app_refpop)
hc_refpop <- hclust(dist_matrice_refpop, method = "ward.D2")
plot(hc_refpop)
# heatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# Extraction des 301 individus des populations de référence
eigenvec_LD001_seq_api_labels <- eigenvec_LD001_seq_api_labels[eigenvec_LD001_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_LD001_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_LD001_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_LD001_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_LD001_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_LD001_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label2 <- c("black", "mediumpurple4", "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_LD001$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
ggplot(data = eigenvec_LD001_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.035, xmax = 0.065, ymin = -0.04, ymax = -0.11)
ggplot(data = eigenvec_LD001_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses autour des points selon Label
ggplot(data = eigenvec_LD001_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_LD001_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ACP
ggplot(data = eigenvec_LD001_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = -0.04, xmax = 0, ymin = 0.08, ymax = 0.15)
setwd("~/Documents/Stage_NB/data/SeqApiPop_561_maf001_LD03")
eigenvec_refpop <- read.table("SeqApiPop_561_maf001_LD03_acp.eigenvec", header = F)
eigenval_refpop <- read.table("SeqApiPop_561_maf001_LD03_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_refpop)[colnames(eigenvec_refpop) == "V2"] <- "name"
eigenvec_refpop_seq_api_labels <- merge(eigenvec_refpop, seq_api_labels, by = "name")
eigen_percent_refpop <- round((eigenval_refpop / (sum(eigenval_refpop) )*100),2)
# Clustering hiérarchique
# Tree
setwd("~/Documents/Stage_NB/data/SeqApiPop_561_maf001_LD03")
matrice_app_refpop <- read.table("SeqApiPop_561_maf001_LD03_acp.rel", header = FALSE)
dist_matrice_refpop <- dist(matrice_app_refpop)
hc_refpop <- hclust(dist_matrice_refpop, method = "ward.D2")
plot(hc_refpop)
# heatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# Extraction des 301 individus des populations de référence
eigenvec_refpop_seq_api_labels <- eigenvec_refpop_seq_api_labels[eigenvec_refpop_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_refpop_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_refpop_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_refpop_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_refpop_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_refpop_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label2 <- c("mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_refpop$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
ggplot(data = eigenvec_refpop_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.49, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.105, xmax = 0.07, ymin = -0.11, ymax = -0.04)
# ellipses autour des points selon Label
ggplot(data = eigenvec_refpop_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
geom_text(aes(x = 0.05, y = 0.04, label = "M-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.01, y = 0.05, label = "C-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.02, y = -0.16, label = "O-lineage"), size = 4, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.49, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_refpop_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
geom_text(aes(x = 0.045, y = 0.04, label = "M-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.01, y = 0.05, label = "C-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.02, y = -0.16, label = "O-lineage"), size = 4, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.49, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ACP
ggplot(data = eigenvec_refpop_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.05, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = -0.11, xmax = -0.2, ymin = 0.07, ymax = 0.02)
# ellipses autour des points selon Label
ggplot(data = eigenvec_refpop_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.05, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_refpop_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.05, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
setwd("~/Documents/Stage_NB/data/SeqApiPop_629_SNPsBeeMuSe")
# SNPsBeeMuSe filtrés
eigenvec_SNPsBeeMuSe <- read.table("SeqApiPop_629_SNPsBeeMuSe_filtered_acp.eigenvec", header = F)
eigenval_SNPsBeeMuSe <- read.table("SeqApiPop_629_SNPsBeeMuSe_filtered_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_SNPsBeeMuSe)[colnames(eigenvec_SNPsBeeMuSe) == "V2"] <- "name"
eigenvec_SNPsBeeMuSe_seq_api_labels <- merge(eigenvec_SNPsBeeMuSe, seq_api_labels, by = "name")
eigen_percent_SNPsBeeMuSe <- round((eigenval_SNPsBeeMuSe / (sum(eigenval_SNPsBeeMuSe) )*100),2)
# Clustering hiérarchique
# Tree
matrice_app_refpop <- read.table("SeqApiPop_629_SNPsBeeMuSe_filtered_acp.rel", header = FALSE)
dist_matrice_refpop <- dist(matrice_app_refpop)
hc_refpop <- hclust(dist_matrice_refpop, method = "ward.D2")
plot(hc_refpop)
# heatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# Extraction des 301 individus des populations de référence
eigenvec_SNPsBeeMuSe_seq_api_labels <- eigenvec_SNPsBeeMuSe_seq_api_labels[eigenvec_SNPsBeeMuSe_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_SNPsBeeMuSe_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_SNPsBeeMuSe_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_SNPsBeeMuSe_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_SNPsBeeMuSe_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_SNPsBeeMuSe_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label2 <- c("black", "mediumpurple4", "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_SNPsBeeMuSe$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.035, xmax = 0.065, ymin = -0.04, ymax = -0.11)
#ellipses autour des points selon Label
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_y_reverse() +
theme(legend.position = c(0.53, 0.7), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = -0.046, xmax = -0.024, ymin = 0.08, ymax = 0.02)
setwd("~/Documents/Stage_NB/data/SeqApiPop_629_SNPsBeeMuSe")
eigenvec_SNPsBeeMuSe <- read.table("SeqApiPop_629_SNPsBeeMuSe_filtered_maf001_LD03_acp.eigenvec", header = F)
eigenval_SNPsBeeMuSe <- read.table("SeqApiPop_629_SNPsBeeMuSe_filtered_maf001_LD03_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_SNPsBeeMuSe)[colnames(eigenvec_SNPsBeeMuSe) == "V2"] <- "name"
eigenvec_SNPsBeeMuSe_seq_api_labels <- merge(eigenvec_SNPsBeeMuSe, seq_api_labels, by = "name")
eigen_percent_SNPsBeeMuSe <- round((eigenval_SNPsBeeMuSe / (sum(eigenval_SNPsBeeMuSe) )*100),2)
# Clustering hiérarchique
# Tree
matrice_app_refpop <- read.table("SeqApiPop_629_SNPsBeeMuSe_filtered_maf001_LD03_acp.rel", header = FALSE)
dist_matrice_refpop <- dist(matrice_app_refpop)
hc_refpop <- hclust(dist_matrice_refpop, method = "ward.D2")
plot(hc_refpop)
# heatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# Extraction des 301 individus des populations de référence
eigenvec_SNPsBeeMuSe_seq_api_labels <- eigenvec_SNPsBeeMuSe_seq_api_labels[eigenvec_SNPsBeeMuSe_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_SNPsBeeMuSe_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_SNPsBeeMuSe_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_SNPsBeeMuSe_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_SNPsBeeMuSe_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_SNPsBeeMuSe_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label2 <- c("black", "mediumpurple4", "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_SNPsBeeMuSe$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.035, xmax = 0.065, ymin = -0.04, ymax = -0.11)
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses autour des points selon Label
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.5, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.025, xmax = 0.05, ymin = 0.02, ymax = 0.11)
setwd("~/Documents/Stage_NB/data/SeqApiPop_629_SNPsBeeMuSe")
eigenvec_SNPsBeeMuSe <- read.table("SeqApiPop_629_SNPsBeeMuSe_filtered_maf001_LD03_default_acp.eigenvec", header = F)
eigenval_SNPsBeeMuSe <- read.table("SeqApiPop_629_SNPsBeeMuSe_filtered_maf001_LD03_default_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_SNPsBeeMuSe)[colnames(eigenvec_SNPsBeeMuSe) == "V2"] <- "name"
eigenvec_SNPsBeeMuSe_seq_api_labels <- merge(eigenvec_SNPsBeeMuSe, seq_api_labels, by = "name")
eigen_percent_SNPsBeeMuSe <- round((eigenval_SNPsBeeMuSe / (sum(eigenval_SNPsBeeMuSe) )*100),2)
# Clustering hiérarchique
# Tree
matrice_app_refpop <- read.table("SeqApiPop_629_SNPsBeeMuSe_filtered_maf001_LD03_default_acp.rel", header = FALSE)
dist_matrice_refpop <- dist(matrice_app_refpop)
hc_refpop <- hclust(dist_matrice_refpop, method = "ward.D2")
plot(hc_refpop)
# Extraction des 301 individus des populations de référence
eigenvec_SNPsBeeMuSe_seq_api_labels <- eigenvec_SNPsBeeMuSe_seq_api_labels[eigenvec_SNPsBeeMuSe_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_SNPsBeeMuSe_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_SNPsBeeMuSe_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_SNPsBeeMuSe_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_SNPsBeeMuSe_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_SNPsBeeMuSe_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label2 <- c("black", "mediumpurple4", "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_SNPsBeeMuSe$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.035, xmax = 0.065, ymin = -0.04, ymax = -0.11)
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.34, 0.02), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses autour des points selon Label
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.5, 0.04), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.5, 0.04), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1, "lines"),
legend.text = element_text(size = 10)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
ggplot(data = eigenvec_SNPsBeeMuSe_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label2,
breaks = c("Ouessant Conservatory", "Colonsay Conservatory", "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Mellifera Ouessant", "Mellifera Colonsay", "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.5, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.025, xmax = 0.05, ymin = 0.02, ymax = 0.11)
setwd("~/Documents/Stage_NB/data/SeqApiPop_561_SNPsBeeMuse_LD03")
eigenvec_561_SNPsBeeMuSe <- read.table("SeqApiPop_561_SNPsBeeMuSe_filtered_maf001_LD03_acp.eigenvec", header = F)
eigenval_561_SNPsBeeMuSe <- read.table("SeqApiPop_561_SNPsBeeMuSe_filtered_maf001_LD03_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_561_SNPsBeeMuSe)[colnames(eigenvec_561_SNPsBeeMuSe) == "V2"] <- "name"
eigenvec_561_SNPsBeeMuSe_seq_api_labels <- merge(eigenvec_561_SNPsBeeMuSe, seq_api_labels, by = "name")
eigen_percent_561_SNPsBeeMuSe <- round((eigenval_561_SNPsBeeMuSe / (sum(eigenval_561_SNPsBeeMuSe) )*100),2)
# Clustering hiérarchique
# Tree
setwd("~/Documents/Stage_NB/data/SeqApiPop_561_SNPsBeeMuse_LD03")
matrice_app_561_default <- read.table("SeqApiPop_561_SNPsBeeMuSe_filtered_maf001_LD03_acp.rel", header = FALSE)
dist_matrice_561_default <- dist(matrice_app_561_default)
hc_561_d <- hclust(dist_matrice_561_default, method = "ward.D2")
plot(hc_561_d)
#h eatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# Extraction des 301 individus des populations de référence
eigenvec_561_SNPsBeeMuSe_seq_api_labels <- eigenvec_561_SNPsBeeMuSe_seq_api_labels[eigenvec_561_SNPsBeeMuSe_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_561_SNPsBeeMuSe_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_561_SNPsBeeMuSe_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_561_SNPsBeeMuSe_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_561_SNPsBeeMuSe_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_561_SNPsBeeMuSe_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label <- c("mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_561_SNPsBeeMuSe$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
ggplot(data = eigenvec_561_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.45, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.085, xmax = 0.045, ymin = -0.12, ymax = -0.05)
ggplot(data = eigenvec_561_SNPsBeeMuSe_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.48, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.025, xmax = 0.05, ymin = -0.04, ymax = -0.12)
setwd("~/Documents/Stage_NB/data/SeqApiPop_561_SNPsBeeMuSe_LD_default")
eigenvec_561_SNPsBeeMuSe <- read.table("SeqApiPop_561_SNPsBeeMuSe_filtered_maf001_LD03_default_pruned_acp.eigenvec", header = F)
eigenval_561_SNPsBeeMuSe <- read.table("SeqApiPop_561_SNPsBeeMuSe_filtered_maf001_LD03_default_pruned_acp.eigenval", header = F)
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
colnames(eigenvec_561_SNPsBeeMuSe)[colnames(eigenvec_561_SNPsBeeMuSe) == "V2"] <- "name"
eigenvec_561_SNPsBeeMuSe_seq_api_labels <- merge(eigenvec_561_SNPsBeeMuSe, seq_api_labels, by = "name")
eigen_percent_561_SNPsBeeMuSe <- round((eigenval_561_SNPsBeeMuSe / (sum(eigenval_561_SNPsBeeMuSe) )*100),2)
# Clustering hiérarchique
# Tree
setwd("~/Documents/Stage_NB/data/SeqApiPop_561_SNPsBeeMuSe_LD_default")
matrice_app_561_default <- read.table("SeqApiPop_561_SNPsBeeMuSe_filtered_maf001_LD03_default_pruned_acp.rel", header = FALSE)
dist_matrice_561_default <- dist(matrice_app_561_default)
hc_561_d <- hclust(dist_matrice_561_default, method = "ward.D2")
plot(hc_561_d)
# heatmap
#heatplot(as.matrix(dist(matrice_app_refpop,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# Extraction des 301 individus des populations de référence
eigenvec_561_SNPsBeeMuSe_seq_api_labels <- eigenvec_561_SNPsBeeMuSe_seq_api_labels[eigenvec_561_SNPsBeeMuSe_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_561_SNPsBeeMuSe_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_561_SNPsBeeMuSe_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_561_SNPsBeeMuSe_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_561_SNPsBeeMuSe_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_561_SNPsBeeMuSe_seq_api_labels$GeneticOrigin != 'Buckfast', ]
custom_colors_label <- c("mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen")
lambda <- eigenval_561_SNPsBeeMuSe$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
# ACP avec variance expliquée
ggplot(data = eigenvec_561_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.45, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.085, xmax = 0.045, ymin = -0.12, ymax = -0.05)
# ellipses autour des points selon Label
ggplot(data = eigenvec_561_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
geom_text(aes(x = 0.05, y = 0.04, label = "M-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.01, y = 0.05, label = "C-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.01, y = -0.15, label = "O-lineage"), size = 4, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.45, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_561_SNPsBeeMuSe_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
geom_text(aes(x = 0.045, y = 0.04, label = "M-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.01, y = 0.05, label = "C-lineage"), size = 4, color = "black") +
geom_text(aes(x = -0.01, y = -0.15, label = "O-lineage"), size = 4, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC1", y = "PC2") +
scale_color_manual(values = custom_colors_label,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.45, 0.03), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
ggplot(data = eigenvec_561_SNPsBeeMuSe_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.45, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2)) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.03, xmax = 0.055, ymin = -0.05, ymax = -0.15)
# ellipses autour des points selon Label
ggplot(data = eigenvec_561_SNPsBeeMuSe_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.45, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# ellipses avec couleur
ggplot(data = eigenvec_561_SNPsBeeMuSe_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point() +
stat_ellipse(aes(group = Label, fill = Label), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot - reference populations", x = "PC3", y = "PC4") +
scale_color_manual(values = custom_colors_label,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
scale_fill_manual(values = custom_colors_label,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia")) +
theme(legend.position = c(0.45, 0.05), legend.justification = c(0, 0),
legend.background = element_rect(fill = "transparent"),
legend.key.size = unit(1.2, "lines"),
legend.text = element_text(size = 11)) +
guides(color = guide_legend(override.aes = list(size = 3.5), ncol = 2))
# Chargement des données
eigenvec <- read.table("BeeMuse_filtered.eigenvec", header = F)
eigenval <- read.table("BeeMuse_filtered.eigenval", header = F)
colnames(eigenvec)[colnames(eigenvec) == "V1"] <- "Sample"
colnames(eigenvec)[colnames(eigenvec) == "V2"] <- "Filename"
Corres_ID_E756 <- read.table("/home/nbettembourg/Documents/Stage_NB/data/BeeMuSe_Wageningen_E756_17_03_2023_Alain/BeeMuSe_Wageningen_E756_17_03_2023_Alain/Corres_ID_E756.txt",header=F)
input_pedigree_BeeMuSe <- read.table("/home/nbettembourg/Documents/Stage_NB/data/BeeMuSe_Wageningen_E756_17_03_2023_Alain/BeeMuSe_Wageningen_E756_17_03_2023_Alain/input-pedigree_BeeMuSe.csv",header=T,sep=";")
# Correspondance pedigree - échantillons BeeMuSe
# extraire 'Pool' et '-100' adin d'obtenir 'Pool-100'
eigenvec$name <- paste(sub("Beemuse_", "", eigenvec$Sample),
sub("_(.*?)\\..*", "\\1", eigenvec$Filename),
sep = "-")
eigenvec$name <- str_extract(eigenvec$name, "[A-Za-z0-9]+-[0-9]+")
colnames(Corres_ID_E756)[colnames(Corres_ID_E756) == "V1"] <- "name"
colnames(Corres_ID_E756)[colnames(Corres_ID_E756) == "V2"] <- "ID_1a"
Corres_ID_E756$ID_1a <- gsub("o", "_", Corres_ID_E756$ID_1a)
# Remplacement de tous les "o" par "_" sauf quand présent dans un mot
Corres_ID_E756$ID_1a <- gsub("Pers_", "Perso", Corres_ID_E756$ID_1a)
Corres_ID_E756$ID_1a <- gsub("L_c", "Loc", Corres_ID_E756$ID_1a)
Corres_ID_E756_eigenvec <- merge(eigenvec, Corres_ID_E756, by = 'name')
merged_3 <- merge(Corres_ID_E756_eigenvec, input_pedigree_BeeMuSe, by = 'ID_1a')
colnames(merged_3)[colnames(merged_3) == "V3.x"] <- "V3"
ind2pop_ID_2a = subset(merged_3, select = c(name, ID_2a))
id_counts <- as.data.frame(table(merged_3$ID_2a))
names(id_counts) <- c("ID_2a", "Occurrence")
# Importation des données de matrice d'apparentement dans R et visualisation par clustering hiérarchique
matrice_app <- read.table("plink2.rel", header = FALSE)
dist_matrice <- dist(matrice_app)
hc <- hclust(dist_matrice, method = "ward.D2")
plot(hc)
#heatplot(as.matrix(dist(matrice_app,diag=T)), cols.default = F, lowcol = 'blue', highcol='yellow', dualScale = F, scale='none', method='ward.D2')
# Proportion de la variance expliquée
eigen_percent <- round((eigenval / (sum(eigenval) )*100),2)
lambda <- eigenval$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
# ACP - Sample : plaque de provenance de génotypage des échantillons
ggplot(eigenvec, aes(x = V3, y = V4, label = Sample, color = Sample)) +
geom_point() +
labs(title = "PCA Plot", x = "PC1", y = "PC2") +
theme(legend.position = "right")
# ACP - ID_breeder : identification de l'apiculteur de chez qui proviennent les abeilles
ggplot(merged_3, aes(x = V3, y = V4, label = ID_breeder, color = ID_breeder)) +
geom_point() +
labs(title = "PCA Plot", x = "PC1", y = "PC2") +
theme(legend.position = "right")
# ACP - ID_2a : pedigree, même reine mère des reines génotypées
ggplot(merged_3, aes(x = V3, y = V4, label = ID_2a, color = ID_2a)) +
geom_point(size = 1) +
scale_color_manual(values = c("#FF0000FF", "#FF3300FF", "#FF6600FF", "#FF9900FF", "#FFCC00FF",
"#FFFF00FF", "#CCFF00FF", "#99FF00FF", "#66FF00FF", "#33FF00FF",
"#00FF00FF", "#00FF33FF", "#00FF66FF", "#00FF99FF", "#00FFCCFF",
"#00FFFFFF", "#00CCFFFF", "#0099FFFF", "#0066FFFF", "#0033FFFF",
"#0000FFFF", "#3300FFFF", "#6600FFFF", "#9900FFFF", "#CC00FFFF",
"#FF00FFFF", "#FF00CCFF", "#FF0099FF", "#FF0066FF")) +
labs(title = "PCA Plot", x = "PC1", y = "PC2") +
theme(legend.position = "right") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = -0.1, xmax = -0.16, ymin = 0.05, ymax = -0.01)
ggplot(merged_3, aes(x = V3, y = V4, label = ID_2a)) +
geom_point(size = 1, color = "red") +
labs(title = "PCA Plot", x = "PC1", y = "PC2") +
theme(legend.position = "right") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = -0.1, xmax = -0.16, ymin = 0.05, ymax = -0.01)
# Visualisaiton ACP PC1 PC2 avec ellipses de seuil de confiance de 0.97
ggplot(merged_3, aes(x = V3, y = V4, label = ID_2a, color = ID_2a)) +
geom_point(size = 0.8) +
stat_ellipse(aes(group = ID_2a), geom = "polygon", level = 0.97, alpha = 0, size = 0.1, color = "black") +
labs(title = "PCA Plot", x = "PC1", y = "PC2") +
theme(legend.position = "right")
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
# 15 familles bien regroupés entre elles
ggplot(subset(merged_3, ID_2a %in% c("BAH_20-19", "BBS_6-19","BER_11-19", "BLS_53-19","BHA_2-20", "KLSU_14-19", "MM_31-20", "MP_10-20", "PersoLD_2021", "PersoLD_2022", "S_GZ_2-19", "SJ_16-20", "SJ_24-20", "TL_13-20", "TL_19-20")), aes(x = V3, y = V4, color = ID_2a)) +
geom_point(size = 1) +
stat_ellipse(aes(group = ID_2a, fill = ID_2a), geom = "polygon", level = 0.97, alpha = 0.15, size = 0.15, color = "black") +
#stat_ellipse(aes(group = ID_2a), geom = "polygon", level = 0.97, alpha = 0, size = 0.1, color = "black") +
labs(title = "PCA Plot", x = "PC1", y = "PC2") +
theme(legend.position = "right")
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
# 14 familles moins bien regroupées entre elles
ggplot(subset(merged_3, ID_2a %in% c( "BH_44","BH_7-19", "ER_13-19","MM_37-20", "KBJ_1-19", "KLoc_37-19","KBru_6-20", "PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoUB_2021", "PersoUB_2022", "SJ_30-20","SBJ_3-19")), aes(x = V3, y = V4, color = ID_2a)) +
geom_point(size = 1) +
stat_ellipse(aes(group = ID_2a, fill = ID_2a), geom = "polygon", level = 0.97, alpha = 0.15, size = 0.15, color = "black") +
#stat_ellipse(aes(group = ID_2a), geom = "polygon", level = 0.97, alpha = 0, size = 0.1, color = "black") +
labs(title = "PCA Plot", x = "PC1", y = "PC2") +
theme(legend.position = "right")
# ACP - Sample (plaques)
ggplot(merged_3, aes(x = V3, y = V4, label = Sample, color = Sample)) +
geom_point() +
# stat_ellipse(aes(group = Sample), geom = "polygon", level = 0.97, alpha = 0, size = 0.2, color = "black") +
# stat_ellipse(aes(group = Sample, fill = Sample), geom = "polygon", level = 0.97, alpha = 0.2, size = 0.2, color = "black") +
labs(title = "PCA Plot", x = "PC1", y = "PC2") +
theme(legend.position = "right")
# 748 -> 681 -> 612 échantillons
values_in_A_not_in_B <- eigenvec$name[!(eigenvec$name %in% merged_3$name)]
length(values_in_A_not_in_B)
## [1] 136
# ACP PC3 PC4 - ID_2a : reines mères des reines génotypées
ggplot(merged_3, aes(x = V5, y = V6, label = ID_2a, color = ID_2a)) +
geom_point(size = 1) +
scale_color_manual(values = c("#FF0000FF", "#FF3300FF", "#FF6600FF", "#FF9900FF", "#FFCC00FF",
"#FFFF00FF", "#CCFF00FF", "#99FF00FF", "#66FF00FF", "#33FF00FF",
"#00FF00FF", "#00FF33FF", "#00FF66FF", "#00FF99FF", "#00FFCCFF",
"#00FFFFFF", "#00CCFFFF", "#0099FFFF", "#0066FFFF", "#0033FFFF",
"#0000FFFF", "#3300FFFF", "#6600FFFF", "#9900FFFF", "#CC00FFFF",
"#FF00FFFF", "#FF00CCFF", "#FF0099FF", "#FF0066FF")) +
labs(title = "PCA Plot - BeeMuSe", x = "PC3", y = "PC4") +
theme(legend.position = "right") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = -0.055, xmax = -0.09, ymin = -0.04, ymax = -0.1)
# 15 bien regroupés
ggplot(subset(merged_3, ID_2a %in% c("BAH_20-19", "BBS_6-19","BER_11-19", "BLS_53-19","BHA_2-20", "KLSU_14-19", "MM_31-20", "MP_10-20", "PersoLD_2021", "PersoLD_2022", "S_GZ_2-19", "SJ_16-20", "SJ_24-20", "TL_13-20", "TL_19-20")), aes(x = V5, y = V6, color = ID_2a)) +
geom_point(size = 1) +
stat_ellipse(aes(group = ID_2a, fill = ID_2a), geom = "polygon", level = 0.97, alpha = 0.15, size = 0.15, color = "black") +
#stat_ellipse(aes(group = ID_2a), geom = "polygon", level = 0.97, alpha = 0, size = 0.1, color = "black") +
labs(title = "PCA Plot - BeeMuSe", x = "PC3", y = "PC4") +
theme(legend.position = "right")
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
# 14 moins bien regroupées entre elles
ggplot(subset(merged_3, ID_2a %in% c( "BH_44","BH_7-19", "ER_13-19","MM_37-20", "KBJ_1-19", "KLoc_37-19","KBru_6-20", "PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoUB_2021", "PersoUB_2022", "SJ_30-20","SBJ_3-19")), aes(x = V5, y = V6, color = ID_2a)) +
geom_point(size = 1) +
stat_ellipse(aes(group = ID_2a, fill = ID_2a), geom = "polygon", level = 0.97, alpha = 0.15, size = 0.15, color = "black") +
#stat_ellipse(aes(group = ID_2a), geom = "polygon", level = 0.97, alpha = 0, size = 0.1, color = "black") +
labs(title = "PCA Plot - BeeMuSe", x = "PC3", y = "PC4") +
theme(legend.position = "right")
## Warning in MASS::cov.trob(data[, vars]): Probable convergence failure
ggplot(merged_3, aes(x = V5, y = V6, label = ID_2a)) +
geom_point(size = 1, color = "red") +
labs(title = "PCA Plot - BeeMuSe", x = "PC3", y = "PC4") +
theme(legend.position = "right") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = -0.055, xmax = -0.09, ymin = -0.04, ymax = -0.1)
# BEEMUSE - ID_2a
beemuse_samples_ID_2a <- merged_3[, c("Sample", "Filename", "ID_2a")]
nom_fichier <- "beemuse_samples_ID_2a.txt"
write.table(beemuse_samples_ID_2a, file = nom_fichier, sep = "\t", row.names = FALSE,quote=FALSE)
# FAM MERGED SEQAPIPOP BEEMUSE 561
merged_fam <- read.table("merged_BeeMuSe_SeqApiPop_561_filtered_maf001_LD_default.fam", sep = " ", stringsAsFactors = FALSE)
merged_fam <- merged_fam[, 1:2]
colnames(merged_fam)[colnames(merged_fam) == "V1"] <- "Sample"
colnames(merged_fam)[colnames(merged_fam) == "V2"] <- "Filename"
# Créer une nouvelle colonne dans le fichier 2 pour stocker les valeurs de la troisième colonne
merged_fam$ID_2a <- NA
# Parcourir les lignes du fichier 2
for (i in 1:nrow(merged_fam)) {
# Vérifier si les valeurs des colonnes Sample et Filename du fichier 2 sont présentes dans le fichier 1
correspondance <- beemuse_samples_ID_2a$ID_2a[beemuse_samples_ID_2a$Sample == merged_fam$Sample[i] & beemuse_samples_ID_2a$Filename == merged_fam$Filename[i]]
# Si une correspondance exacte est trouvée, ajouter la valeur correspondante de la colonne "ID_2a" du fichier 1 au fichier 2
if (length(correspondance) > 0) {
merged_fam$ID_2a[i] <- as.character(correspondance[1]) # Utilisez la correspondance trouvée
} else {
# Sinon, ajouter "Beemuse" dans la troisième colonne du fichier 2
merged_fam$ID_2a[i] <- "Beemuse"
}
}
# Créer fichier de correspondance entre les échantillons de BeeMuSe ID_2a avec le fichier .fam
correspondance <- merged_fam[, c("Sample","Filename","ID_2a")]
nom_fichier <- "correspondance_samples.txt"
write.table(correspondance, file = nom_fichier, sep = "\t", row.names = FALSE,quote=FALSE)
setwd("~/Documents/Stage_NB/data/merged_data_1055_561_not_supervised")
eigenvec_merged_maf001_LD03 <- read.table("merged_BeeMuSe_SeqApiPop_561_filtered_maf001_LD_default_acp.eigenvec", header = F)
eigenval_merged_maf001_LD03 <- read.table("merged_BeeMuSe_SeqApiPop_561_filtered_maf001_LD_default_acp.eigenval", header = F)
colnames(eigenvec_merged_maf001_LD03)[colnames(eigenvec_merged_maf001_LD03) == "V1"] <- "Sample"
colnames(eigenvec_merged_maf001_LD03)[colnames(eigenvec_merged_maf001_LD03) == "V2"] <- "name"
Corres_ID_E756 <- read.table("/home/nbettembourg/Documents/Stage_NB/data/BeeMuSe_Wageningen_E756_17_03_2023_Alain/BeeMuSe_Wageningen_E756_17_03_2023_Alain/Corres_ID_E756.txt",header=F)
input_pedigree_BeeMuSe <- read.table("/home/nbettembourg/Documents/Stage_NB/data/BeeMuSe_Wageningen_E756_17_03_2023_Alain/BeeMuSe_Wageningen_E756_17_03_2023_Alain/input-pedigree_BeeMuSe.csv",header=T,sep=";")
seq_api_labels <- read.csv("~/Documents/Stage_NB/data/SeqApiPop_labels.csv")
eigenvec_merged_maf001_LD03_seq_api_labels <- merge(eigenvec_merged_maf001_LD03, seq_api_labels, by = "name")
eigen_percent <- round((eigenval_merged_maf001_LD03 / (sum(eigenval_merged_maf001_LD03) )*100),2)
lambda <- eigenval_merged_maf001_LD03$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
# Extraction des 301 individus des populations de référence
eigenvec_merged_maf001_LD03_seq_api_labels <- eigenvec_merged_maf001_LD03_seq_api_labels[eigenvec_merged_maf001_LD03_seq_api_labels$GeneticOrigin != 'Unknown' &
eigenvec_merged_maf001_LD03_seq_api_labels$Label != 'Ariege Conservatory' &
eigenvec_merged_maf001_LD03_seq_api_labels$Label != 'Brittany Conservatory' &
eigenvec_merged_maf001_LD03_seq_api_labels$UniqueInHive != 'Unknown' &
eigenvec_merged_maf001_LD03_seq_api_labels$UniqueInHive != 'Buckfast' &
eigenvec_merged_maf001_LD03_seq_api_labels$GeneticOrigin != 'Buckfast', ]
# Filtrer les lignes où les facteurs des colonnes Sample et name sont différents
eigenvec_BeeMuSe_748_Samples <- eigenvec_merged_maf001_LD03[as.character(eigenvec_merged_maf001_LD03$Sample) != as.character(eigenvec_merged_maf001_LD03$name), ]
# extraire 'Pool' et '-100' obtenir 'Pool-100'
eigenvec_merged_maf001_LD03$name <- paste(sub("Beemuse_", "", eigenvec_merged_maf001_LD03$Sample),
sub("_(.*?)\\..*", "\\1", eigenvec_merged_maf001_LD03$name),
sep = "-")
eigenvec_merged_maf001_LD03$name <- str_extract(eigenvec_merged_maf001_LD03$name, "[A-Za-z0-9]+-[0-9]+")
colnames(Corres_ID_E756)[colnames(Corres_ID_E756) == "V1"] <- "name"
colnames(Corres_ID_E756)[colnames(Corres_ID_E756) == "V2"] <- "ID_1a"
Corres_ID_E756$ID_1a <- gsub("o", "_", Corres_ID_E756$ID_1a)
# Remplacement de "o" par "_" sauf quand présent dans un mot
Corres_ID_E756$ID_1a <- gsub("Pers_", "Perso", Corres_ID_E756$ID_1a)
Corres_ID_E756$ID_1a <- gsub("L_c", "Loc", Corres_ID_E756$ID_1a)
Corres_ID_E756_eigenvec <- merge(eigenvec_merged_maf001_LD03, Corres_ID_E756, by = 'name')
eigenvec_merged_Corres_ID_E756_pedigree <- merge(Corres_ID_E756_eigenvec, input_pedigree_BeeMuSe, by = 'ID_1a')
colnames(eigenvec_merged_Corres_ID_E756_pedigree)[colnames(eigenvec_merged_Corres_ID_E756_pedigree) == "V3.x"] <- "V3"
# ACP - merged data
lambda <- eigenval_merged_maf001_LD03$V1
variance_proportion <- lambda / sum(lambda)
variance_df <- data.frame(PC = seq_along(variance_proportion), Variance = variance_proportion)
ggplot(eigenvec_merged_maf001_LD03, aes(x = V3, y = V4)) +
geom_point() +
scale_x_reverse() +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC1", y = "PC2") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.065, xmax = 0.04, ymin = -0.1, ymax = -0.05)
# ACP plot - BeeMuSe 748 & SeqApiPop 301 RefPop
# ACP SeqApiPop
plot1 <- ggplot(eigenvec_merged_maf001_LD03_seq_api_labels, aes(x = V3, y = V4, color = "SeqApiPop")) +
geom_point() +
scale_x_reverse()
# Ajout ACP pour BeeMuSe
plot2 <- plot1 + geom_point(data = eigenvec_BeeMuSe_748_Samples, aes(x = V3, y = V4, color = "BeeMuSe")) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop",x = "PC1", y = "PC2")
# Ajout de la légende
plot2 + scale_color_manual(name = "Dataset",
values = c(SeqApiPop = "black", BeeMuSe = "red"),
labels = c(SeqApiPop = "SeqApiPop 301 RefPop", BeeMuSe = "BeeMuSe 748 Samples")) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.075, xmax = 0.04, ymin = -0.1, ymax = -0.05)
# ACP plot - BeeMuSe 612 Pedigree & SeqApiPop 301 RefPop
# ACP SeqApiPop
plot1 <- ggplot(eigenvec_merged_maf001_LD03_seq_api_labels, aes(x = V3, y = V4, color = "SeqApiPop")) +
geom_point() +
scale_x_reverse()
# Ajout ACP pour BeeMuSe
plot2 <- plot1 + geom_point(data = eigenvec_merged_Corres_ID_E756_pedigree, aes(x = V3, y = V4, color = "BeeMuSe")) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop",x = "PC1", y = "PC2")
# Ajout de la légende
plot2 + scale_color_manual(name = "Dataset",
values = c(SeqApiPop = "black", BeeMuSe = "red"),
labels = c(SeqApiPop = "SeqApiPop 301 RefPop", BeeMuSe = "BeeMuSe 612 Pedigree")) +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.075, xmax = 0.04, ymin = -0.1, ymax = -0.05)
# Couleurs 3 lignées - Label
custom_colors_2 <- c( "black", "black", "black", "black", "orange", "orange", "orange", "orange", "orange", "orange", "mediumseagreen", "red")
# ACP SeqApiPop
plot1 <- ggplot(eigenvec_merged_maf001_LD03_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point(size = 1) +
scale_x_reverse() +
#scale_y_reverse() +
scale_color_manual(values = custom_colors_2,
breaks = c("Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia"))
# Ajout ACP pour BeeMuSe
plot1 + geom_point(data = eigenvec_BeeMuSe_748_Samples, aes(x = V3, y = V4), color = "red", size =1) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC1", y = "PC2") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.075, xmax = 0.04, ymin = -0.1, ymax = -0.05)
# Couleurs des 11 populations de référence - Label
custom_colors <- c( "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen", "red")
# ACP SeqApiPop
plot1 <- ggplot(eigenvec_merged_maf001_LD03_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point(size=1) +
scale_x_reverse() +
scale_color_manual(values = custom_colors,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia"))
# Ajout ACP pour BeeMuSe
plot1 + geom_point(data = eigenvec_BeeMuSe_748_Samples, aes(x = V3, y = V4), color = "red", size=1) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC1", y = "PC2") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.075, xmax = 0.04, ymin = -0.1, ymax = -0.05)
# SeqApiPop - 301 Samples - BeeMuSe 612 Samples - ID_2a - colors
custom_colors <- c( "black", "black", "black",
"black", "black", "black", "black", "black", "black", "black",
"black",
"#FF0000FF", "#FF3300FF", "#FF6600FF", "#FF9900FF", "#FFCC00FF",
"#FFFF00FF", "#CCFF00FF", "#99FF00FF", "#66FF00FF", "#33FF00FF",
"#00FF00FF", "#00FF33FF", "#00FF66FF", "#00FF99FF", "#00FFCCFF",
"#00FFFFFF", "#00CCFFFF", "#0099FFFF", "#0066FFFF", "#0033FFFF",
"#0000FFFF", "#3300FFFF", "#6600FFFF", "#9900FFFF", "#CC00FFFF",
"#FF00FFFF", "#FF00CCFF", "#FF0099FF", "#FF0066FF")
# ACP SeqApiPop
plot1 <- ggplot(eigenvec_merged_maf001_LD03_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point(size=1) +
scale_x_reverse() +
scale_color_manual(values = custom_colors,
breaks = c( "Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France", "BAH_20-19", "BBS_6-19", "BER_11-19", "BH_44", "BH_7-19", "BHA_2-20", "BLS_53-19", "ER_13-19", "KBJ_1-19", "KBru_6-20", "KLoc_37-19", "KLSU_14-19", "MM_31-20", "MM_37-20", "MP_10-20",
"PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoLD_2021", "PersoLD_2022", "PersoUB_2021",
"PersoUB_2022", "S_GZ_2-19", "SBJ_3-19", "SJ_16-20", "SJ_24-20", "SJ_30-20", "TL_13-20",
"TL_19-20"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia","BAH_20-19", "BBS_6-19", "BER_11-19", "BH_44", "BH_7-19", "BHA_2-20", "BLS_53-19", "ER_13-19",
"KBJ_1-19","KBru_6-20", "KLoc_37-19", "KLSU_14-19", "MM_31-20", "MM_37-20", "MP_10-20",
"PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoLD_2021", "PersoLD_2022", "PersoUB_2021",
"PersoUB_2022", "S_GZ_2-19", "SBJ_3-19", "SJ_16-20", "SJ_24-20", "SJ_30-20", "TL_13-20",
"TL_19-20")) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC1", y = "PC2") +
theme(legend.position = "right")
# Ajout ACP BeeMuSe
plot1 + geom_point(data = eigenvec_merged_Corres_ID_E756_pedigree, aes(x = V3, y = V4, color = ID_2a),size=1) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC1", y = "PC2") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.075, xmax = 0.04, ymin = -0.1, ymax = -0.05)
# SeqApiPop - 301 Samples - Label - colors - 3 lignées
custom_colors <- c( "black", "black", "black",
"black", "orange", "orange", "orange", "orange", "orange", "orange",
"mediumseagreen",
"#FF0000FF", "#FF3300FF", "#FF6600FF", "#FF9900FF", "#FFCC00FF",
"#FFFF00FF", "#CCFF00FF", "#99FF00FF", "#66FF00FF", "#33FF00FF",
"#00FF00FF", "#00FF33FF", "#00FF66FF", "#00FF99FF", "#00FFCCFF",
"#00FFFFFF", "#00CCFFFF", "#0099FFFF", "#0066FFFF", "#0033FFFF",
"#0000FFFF", "#3300FFFF", "#6600FFFF", "#9900FFFF", "#CC00FFFF",
"#FF00FFFF", "#FF00CCFF", "#FF0099FF", "#FF0066FF")
# ACP SeqApiPop
plot1 <- ggplot(eigenvec_merged_maf001_LD03_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point(size=1) +
scale_x_reverse() +
scale_color_manual(values = custom_colors,
breaks = c("Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France", "BAH_20-19", "BBS_6-19", "BER_11-19", "BH_44", "BH_7-19", "BHA_2-20", "BLS_53-19", "ER_13-19", "KBJ_1-19", "KBru_6-20", "KLoc_37-19", "KLSU_14-19", "MM_31-20", "MM_37-20", "MP_10-20",
"PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoLD_2021", "PersoLD_2022", "PersoUB_2021",
"PersoUB_2022", "S_GZ_2-19", "SBJ_3-19", "SJ_16-20", "SJ_24-20", "SJ_30-20", "TL_13-20",
"TL_19-20"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia","BAH_20-19", "BBS_6-19", "BER_11-19", "BH_44", "BH_7-19", "BHA_2-20", "BLS_53-19", "ER_13-19",
"KBJ_1-19","KBru_6-20", "KLoc_37-19", "KLSU_14-19", "MM_31-20", "MM_37-20", "MP_10-20",
"PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoLD_2021", "PersoLD_2022", "PersoUB_2021",
"PersoUB_2022", "S_GZ_2-19", "SBJ_3-19", "SJ_16-20", "SJ_24-20", "SJ_30-20", "TL_13-20",
"TL_19-20")) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC1", y = "PC2") +
theme(legend.position = "right")
# Ajout ACP BeeMuSe
plot1 + geom_point(data = eigenvec_merged_Corres_ID_E756_pedigree, aes(x = V3, y = V4, color = ID_2a),size=1) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC1", y = "PC2") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.075, xmax = 0.04, ymin = -0.1, ymax = -0.05)
# SeqApiPop - 301 - Label - couleurs - 11 populations de référence
custom_colors <- c("mediumpurple3", "lightskyblue", "mediumvioletred",
"hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3",
"mediumseagreen",
"#FF0000FF", "#FF3300FF", "#FF6600FF", "#FF9900FF", "#FFCC00FF",
"#FFFF00FF", "#CCFF00FF", "#99FF00FF", "#66FF00FF", "#33FF00FF",
"#00FF00FF", "#00FF33FF", "#00FF66FF", "#00FF99FF", "#00FFCCFF",
"#00FFFFFF", "#00CCFFFF", "#0099FFFF", "#0066FFFF", "#0033FFFF",
"#0000FFFF", "#3300FFFF", "#6600FFFF", "#9900FFFF", "#CC00FFFF",
"#FF00FFFF", "#FF00CCFF", "#FF0099FF", "#FF0066FF")
# ACP SeqApiPop
plot1 <- ggplot(eigenvec_merged_maf001_LD03_seq_api_labels, aes(x = V3, y = V4, color = Label)) +
geom_point(size = 1) +
scale_x_reverse() +
scale_color_manual(values = custom_colors,
breaks = c("Iberiensis Spain", "Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France", "BAH_20-19", "BBS_6-19", "BER_11-19", "BH_44", "BH_7-19", "BHA_2-20", "BLS_53-19", "ER_13-19", "KBJ_1-19", "KBru_6-20", "KLoc_37-19", "KLSU_14-19", "MM_31-20", "MM_37-20", "MP_10-20",
"PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoLD_2021", "PersoLD_2022", "PersoUB_2021",
"PersoUB_2022", "S_GZ_2-19", "SBJ_3-19", "SJ_16-20", "SJ_24-20", "SJ_30-20", "TL_13-20",
"TL_19-20"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia","BAH_20-19", "BBS_6-19", "BER_11-19", "BH_44", "BH_7-19", "BHA_2-20", "BLS_53-19", "ER_13-19",
"KBJ_1-19","KBru_6-20", "KLoc_37-19", "KLSU_14-19", "MM_31-20", "MM_37-20", "MP_10-20",
"PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoLD_2021", "PersoLD_2022", "PersoUB_2021",
"PersoUB_2022", "S_GZ_2-19", "SBJ_3-19", "SJ_16-20", "SJ_24-20", "SJ_30-20", "TL_13-20",
"TL_19-20")) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC1", y = "PC2") +
theme(legend.position = "right")
# Ajout ACP BeeMuSe
plot1 + geom_point(data = eigenvec_merged_Corres_ID_E756_pedigree, aes(x = V3, y = V4, color = ID_2a), size = 1) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC1", y = "PC2") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.075, xmax = 0.04, ymin = -0.1, ymax = -0.05)
# ACP - PC3 / PC4
# ACP SeqApiPop
plot1 <- ggplot(eigenvec_merged_maf001_LD03_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point(size = 1) +
scale_x_reverse() +
scale_color_manual(values = custom_colors_2,
breaks = c("Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia"))
# Ajout ACP BeeMuSe
plot1 + geom_point(data = eigenvec_BeeMuSe_748_Samples, aes(x = V5, y = V6), color = "red", size = 1) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC3", y = "PC4") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.097, xmax = 0.034, ymin = -0.1, ymax = -0.05)
# Couleurs 11 populations de référence
custom_colors <- c( "mediumpurple3", "lightskyblue", "mediumvioletred", "hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3", "mediumseagreen", "red")
# ACP SeqApiPop
plot1 <- ggplot(eigenvec_merged_maf001_LD03_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point(size = 1) +
scale_x_reverse() +
scale_color_manual(values = custom_colors,
breaks = c("Iberiensis Spain",
"Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France"),
labels = c( "Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia"))
# Ajout ACP BeeMuSe
plot1 + geom_point(data = eigenvec_BeeMuSe_748_Samples, aes(x = V5, y = V6), color = "red", size = 1) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC3", y = "PC4") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.097, xmax = 0.034, ymin = -0.1, ymax = -0.05)
# Couleurs des 11 populations de référence SeqApiPop + 29 familles ID_2a
custom_colors <- c("mediumpurple3", "lightskyblue", "mediumvioletred",
"hotpink1", "yellow", "gold", "orange", "chocolate", "brown", "olivedrab3",
"mediumseagreen",
"#FF0000FF", "#FF3300FF", "#FF6600FF", "#FF9900FF", "#FFCC00FF",
"#FFFF00FF", "#CCFF00FF", "#99FF00FF", "#66FF00FF", "#33FF00FF",
"#00FF00FF", "#00FF33FF", "#00FF66FF", "#00FF99FF", "#00FFCCFF",
"#00FFFFFF", "#00CCFFFF", "#0099FFFF", "#0066FFFF", "#0033FFFF",
"#0000FFFF", "#3300FFFF", "#6600FFFF", "#9900FFFF", "#CC00FFFF",
"#FF00FFFF", "#FF00CCFF", "#FF0099FF", "#FF0066FF")
# ACP SeqApiPop
plot1 <- ggplot(eigenvec_merged_maf001_LD03_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point(size = 1) +
scale_x_reverse() +
scale_color_manual(values = custom_colors,
breaks = c("Iberiensis Spain", "Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France", "BAH_20-19", "BBS_6-19", "BER_11-19", "BH_44", "BH_7-19", "BHA_2-20", "BLS_53-19", "ER_13-19", "KBJ_1-19", "KBru_6-20", "KLoc_37-19", "KLSU_14-19", "MM_31-20", "MM_37-20", "MP_10-20",
"PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoLD_2021", "PersoLD_2022", "PersoUB_2021",
"PersoUB_2022", "S_GZ_2-19", "SBJ_3-19", "SJ_16-20", "SJ_24-20", "SJ_30-20", "TL_13-20",
"TL_19-20"),
labels = c("Iberiensis Spain",
"Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia","BAH_20-19", "BBS_6-19", "BER_11-19", "BH_44", "BH_7-19", "BHA_2-20", "BLS_53-19", "ER_13-19",
"KBJ_1-19","KBru_6-20", "KLoc_37-19", "KLSU_14-19", "MM_31-20", "MM_37-20", "MP_10-20",
"PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoLD_2021", "PersoLD_2022", "PersoUB_2021",
"PersoUB_2022", "S_GZ_2-19", "SBJ_3-19", "SJ_16-20", "SJ_24-20", "SJ_30-20", "TL_13-20",
"TL_19-20")) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC3", y = "PC4") +
theme(legend.position = "right")
# Ajouter ACP BeeMuSe
plot1 + geom_point(data = eigenvec_merged_Corres_ID_E756_pedigree, aes(x = V5, y = V6, color = ID_2a), size = 1) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC3", y = "PC4") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.097, xmax = 0.034, ymin = -0.1, ymax = -0.05)
# Couleurs 3 lignées SeqApiPop + 29 familles ID_2a
custom_colors <- c( "black", "black", "black",
"black", "orange", "orange", "orange", "orange", "orange", "orange",
"mediumseagreen",
"#FF0000FF", "#FF3300FF", "#FF6600FF", "#FF9900FF", "#FFCC00FF",
"#FFFF00FF", "#CCFF00FF", "#99FF00FF", "#66FF00FF", "#33FF00FF",
"#00FF00FF", "#00FF33FF", "#00FF66FF", "#00FF99FF", "#00FFCCFF",
"#00FFFFFF", "#00CCFFFF", "#0099FFFF", "#0066FFFF", "#0033FFFF",
"#0000FFFF", "#3300FFFF", "#6600FFFF", "#9900FFFF", "#CC00FFFF",
"#FF00FFFF", "#FF00CCFF", "#FF0099FF", "#FF0066FF")
# ACP SeqApiPop
plot1 <- ggplot(eigenvec_merged_maf001_LD03_seq_api_labels, aes(x = V5, y = V6, color = Label)) +
geom_point(size = 1) +
scale_x_reverse() +
scale_color_manual(values = custom_colors,
breaks = c( "Iberiensis Spain", "Savoy Conservatory", "Porquerolles Conservatory", "Sollies Conservatory",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia France", "BAH_20-19", "BBS_6-19", "BER_11-19", "BH_44", "BH_7-19", "BHA_2-20", "BLS_53-19", "ER_13-19", "KBJ_1-19", "KBru_6-20", "KLoc_37-19", "KLSU_14-19", "MM_31-20", "MM_37-20", "MP_10-20",
"PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoLD_2021", "PersoLD_2022", "PersoUB_2021",
"PersoUB_2022", "S_GZ_2-19", "SBJ_3-19", "SJ_16-20", "SJ_24-20", "SJ_30-20", "TL_13-20",
"TL_19-20"),
labels = c( "Iberiensis Spain", "Mellifera Savoy", "Mellifera Porquerolles", "Mellifera Sollies",
"Ligustica Italy", "Carnica Slovenia", "Carnica Germany",
"Carnica Switzerland", "Carnica France", "Carnica Poland", "Caucasia","BAH_20-19", "BBS_6-19", "BER_11-19", "BH_44", "BH_7-19", "BHA_2-20", "BLS_53-19", "ER_13-19",
"KBJ_1-19","KBru_6-20", "KLoc_37-19", "KLSU_14-19", "MM_31-20", "MM_37-20", "MP_10-20",
"PersoBC_2021", "PersoJLL_2021", "PersoJLL_2022", "PersoLD_2021", "PersoLD_2022", "PersoUB_2021",
"PersoUB_2022", "S_GZ_2-19", "SBJ_3-19", "SJ_16-20", "SJ_24-20", "SJ_30-20", "TL_13-20",
"TL_19-20"))
# Ajout ACP BeeMuSe
plot1 + geom_point(data = eigenvec_merged_Corres_ID_E756_pedigree, aes(x = V5, y = V6, color = ID_2a), size = 1 ) +
labs(title = "PCA Plot - BeeMuSe & SeqApiPop", x = "PC3", y = "PC4") +
annotation_custom(
ggplotGrob(
ggplot(variance_df, aes(x = factor(PC), y = Variance)) +
geom_bar(stat = "identity", fill = "steelblue", width = 0.8) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.position = "none")
), xmin = 0.097, xmax = 0.034, ymin = -0.1, ymax = -0.05)